library(janitor) # Simple Tools for Examining and Cleaning Dirty Data CRAN v2.2.0
library(glue) # Interpreted String Literals CRAN v1.7.0
library(broom) # Convert Statistical Objects into Tidy Tibbles CRAN v1.0.5
library(fixest) # Fast Fixed-Effects Estimations CRAN v0.11.2
library(randomForest) # Breiman and Cutler's Random Forests for Classification and Regression CRAN v4.7-1.1
library(palmer) # Maxwell Palmer's Personal R Package [github::maxwellpalmer/palmer] v0.0.0.4
library(tidyverse) # Easily Install and Load the 'Tidyverse' CRAN v2.0.0
options(dplyr.summarise.inform = FALSE)

town.data <- read_rds("town_data.rds")
units <- read_rds("hnma_units.rds")

# Figure 2 ----------------------------------------------------------------

d1 <- units %>% summarise(p=sum(units[rent=="Rent % of Indiv. Income"])/sum(units), units=sum(units), .by=city) %>%
  mutate(lab="Rent is % of Individual Income")
d2 <- units %>% summarise(p=sum(units[br2=="2+ BR"])/sum(units), units=sum(units), .by=city) %>%
  mutate(lab="2+ Bedrooms")
d3 <- units %>% summarise(p=sum(units[age=="Age Res."])/sum(units), units=sum(units), .by=city) %>%
  mutate(lab="Age Restrictions")

bind_rows(d1, d2, d3) %>%
  mutate(lab=factor(lab, levels=unique(lab))) %>%
  arrange(lab, p, desc(units)) %>%
  mutate(x=row_number(), .by=lab) %>%
  arrange(desc(units)) %>%
  ggplot(aes(x=x, y=p, size=units)) +
  geom_point(pch=21, color="white", fill="gray30", stroke=.1, show.legend = F) +
  facet_wrap(~lab, ncol=1) +
  #guides(fill = guide_legend(override.aes = list(size = 3))) +
  scale_size_continuous(range=c(1, 10), guide=NULL) +
  scale_x_discrete(expand=c(0,1)) +
  scale_y_continuous(labels=scales::percent_format(), limits=c(0, 1), expand=c(0,.02)) +
  theme_mp(grid="minor.y") +
  labs(x=NULL, y="Percent of Units in Municipality") +
  theme(legend.title=element_blank(),
        legend.position = c(1,0),
        legend.justification = c(1,0),
        axis.text.x=element_blank())
ggsave("distributions_of_key_variables_final.pdf", width=6, height=8, device=cairo_pdf)
ggsave("distributions_of_key_variables_final.png", width=6, height=8)


# Figure 3 ----------------------------------------------------------------

town.data %>%
  mutate(units_per_hu=units/housing_units) %>%
  arrange(desc(median_housing_price)) %>%
  ggplot(aes(x=median_housing_price/1000, y=units_per_hu)) +
  geom_point(aes(size=units), show.legend = F, pch=21, color="white", fill="gray30") +
  geom_text(aes(label=ifelse(city %in% c("Boston", "Cambridge", "Springfield", "Holyoke", "Chelsea", "Lawrence",
                                         "Weston", "Wellesley", "Nantucket", "Brookline", "Lincoln", "Newton", "Brockton",
                                         "Winchester"), city, NA), x=ifelse(city=="Boston", median_housing_price/1000+30, median_housing_price/1000)), 
            hjust=0, size=2, nudge_x=20) +
  scale_size_continuous(range=c(1, 10)) +
  scale_x_continuous(labels=scales::comma_format(), limits=c(0, 1550), breaks=seq(0, 1500, 250), expand=c(0, 10)) +
  scale_y_continuous(labels=scales::percent_format(), limits=c(0, .175), breaks=seq(0, .2, .025), expand=c(0, 0.001)) +
  labs(x="Median Housing Price ($k)", y="Subsidized Housing Units / Total Housing Units ") +
  theme_mp() +
  theme(plot.margin = margin(10, 10, 10, 10))
ggsave("housing_units_and_prices_final.pdf", width=6, height=4, device=cairo_pdf)
ggsave("housing_units_and_prices_final.png", width=6, height=4)

# Figure 4 ----------------------------------------------------------------

town.data %>%
  arrange(desc(median_housing_price)) %>%
  ggplot(aes(x=median_housing_price/1000, y=rent_indiv_units)) +
  geom_point(aes(size=units), show.legend = F, pch=21, color="white", fill="gray30") +
  scale_size_continuous(range=c(1, 10)) +
  scale_x_continuous(labels=scales::comma_format(), limits=c(0, 1550), breaks=seq(0, 1500, 250), expand=c(0, 10)) +
  scale_y_continuous(labels=scales::percent_format(), limits=c(0, 1), expand=c(0, 0.01)) +
  labs(x="Median Housing Price ($k)", y="Subsidized Units Where Rent is Based on Income / \nTotal Subsidized Housing Units ") +
  theme_mp() +
  theme(plot.margin = margin(10, 10, 10, 10))
ggsave("pct_of_income_units_final.pdf", width=6, height=4, device=cairo_pdf)
ggsave("pct_of_income_units_final.png", width=6, height=4)


# Figure 5 ----------------------------------------------------------------

# Note: Figure made by hand using below calculation.

units %>% mutate(cat=case_when(age=="Age Res." ~ "Age Res.",
                               T ~ br2)) %>%
  summarise(units=sum(units), .by=c(rent, cat)) %>%
  mutate(p=units/sum(units))


# Figure 6 ----------------------------------------------------------------

town.data %>%
  arrange(desc(median_housing_price)) %>%
  ggplot(aes(x=median_housing_price/1000, y=age_rest_units)) +
  geom_point(aes(size=units), show.legend = F, pch=21, color="white", fill="gray30") +
  scale_size_continuous(range=c(1, 10)) +
  scale_x_continuous(labels=scales::comma_format(), limits=c(0, 1550), breaks=seq(0, 1500, 250), expand=c(0, 10)) +
  scale_y_continuous(labels=scales::percent_format(), limits=c(0, 1), expand=c(0, 0.01)) +
  labs(x="Median Housing Price ($k)", y="Percent of Subsidized Units With Age Restrictions") +
  theme_mp() +
  theme(plot.margin = margin(10, 10, 10, 10))
ggsave("pct_of_age_units_final.pdf", width=6, height=4, device=cairo_pdf)
ggsave("pct_of_age_units_final.png", width=6, height=4)

# Figure 7 ----------------------------------------------------------------

town.data %>%
  arrange(desc(median_housing_price)) %>%
  ggplot(aes(x=pct_white, y=age_rest_units)) +
  geom_smooth(method="lm", se=F, color="black", lwd=1) +
  geom_point(aes(size=units), show.legend = F, pch=21, color="white", fill="gray30") +
  scale_size_continuous(range=c(1, 10)) +
  scale_x_continuous(labels=scales::percent_format(), limits=c(0, 1), expand=c(0, 0.01)) +
  scale_y_continuous(labels=scales::percent_format(), limits=c(0, 1), expand=c(0, 0.01)) +
  labs(x="Percent White", y="Percent of Subsidized Units With Age Restrictions") +
  theme_mp() +
  theme(plot.margin = margin(10, 10, 10, 10))
ggsave("age_race_final.pdf", width=6, height=4, device=cairo_pdf)
ggsave("age_race_final.png", width=6, height=4)


# Table 1 -----------------------------------------------------------------

m1 <- feols(age_rest_units ~ median_hh_income + pct_white, data=town.data)
m2 <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors, data=town.data)
m3 <- feols(age_rest_units ~ median_hh_income + pct_white + pct_below_poverty, data=town.data)
m4 <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors_below_poverty, data=town.data)
m5 <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors_of_poverty, data=town.data)
m6 <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors + pct_below_poverty + pct_seniors_below_poverty + pct_seniors_of_poverty, data=town.data)

setFixest_dict(c("age_rest_units"="% Age Restricted Units",
                 "median_hh_income"="Median HH Income ($k)", 
                 "pct_white"="% White",
                 "pct_seniors"="% Aged 60+",
                 "pct_below_poverty"="% Below Poverty Level",
                 "pct_seniors_below_poverty"="% Seniors Below Poverty Level",
                 "pct_seniors_of_poverty"="% Below Poverty Level Who Are Seniors"))
etable(m1, m2, m3, m4, m5, m6)
etable(m1, m2, m3, m4, m5, m6, tex=T) %>% writeLines("reg1_final.tex")


# Models with Neighboring Towns (not reported) ----------------------------

m1n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_white.n, data=town.data)
m2n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_black.n, data=town.data)
m3n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors.n, data=town.data)
m4n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_black.n + pct_seniors.n, data=town.data)
m5n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_below_poverty.n, data=town.data)
m6n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors_below_poverty.n, data=town.data)
m7n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors_of_poverty.n, data=town.data)
m8n <- feols(age_rest_units ~ median_hh_income + pct_white + pct_seniors + pct_black.n + pct_seniors.n + pct_below_poverty.n + pct_seniors_below_poverty.n + pct_seniors_of_poverty.n, data=town.data)

etable(m1n, m2n, m3n, m4n, m5n, m6n, m7n, m8n)

# Random Forest Test (not reported) ---------------------------------------

rf.dat <- town.data %>% select(city, age_rest_units, contains("pct")) %>% na.omit()
rf = randomForest(x = rf.dat[-(1:2)], 
                             y = rf.dat$age_rest_units, 
                             ntree = 1000) 
importance(rf)
varImpPlot(rf) 
